Interpretation
Contents
Interpretation#
Load Data#
import pandas as pd
df = pd.read_csv('data/tag_df_final.csv')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Input In [1], in <cell line: 3>()
1 import pandas as pd
----> 3 df = pd.read_csv('data/tag_df_final.csv')
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\io\parsers\readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File ~\anaconda3\envs\steam_nlp\lib\site-packages\pandas\io\common.py:856, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
851 elif isinstance(handle, str):
852 # Check whether the filename is to be opened in binary mode.
853 # Binary mode does not support 'encoding' and 'newline'.
854 if ioargs.encoding and "b" not in ioargs.mode:
855 # Encoding
--> 856 handle = open(
857 handle,
858 ioargs.mode,
859 encoding=ioargs.encoding,
860 errors=errors,
861 newline="",
862 )
863 else:
864 # Binary mode
865 handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'data/tag_df_final.csv'
Viewing the top 10 game aspects tagged with a non-neutral sentiment polarity reveals that ‘game’ has by far the highest frequency.
Given the lack of specific information this provides, it will be useful to preclude this token from subsequent charts.
(df[df['sentiment']!=0]
.groupby('aspect',as_index=False)['description']
.count().rename(columns={'description':'Count'})
.sort_values(by='Count',ascending=False)
.head(10)).reset_index(drop=True)
| aspect | Count | |
|---|---|---|
| 0 | game | 11527 |
| 1 | campaign | 2120 |
| 2 | people | 1172 |
| 3 | multiplayer | 1125 |
| 4 | map | 1065 |
| 5 | play | 875 |
| 6 | crash | 809 |
| 7 | player | 707 |
| 8 | war | 681 |
| 9 | system | 678 |
Aspect Frequency#
Charting the most frequent aspects tagged with a non-neutral score, it is clear how divisive many aspects of the game design are for the player base.
Several aspects feature in the top 20 of both sentiment lists which is indicative of the ‘mixed’ review score that game currently has on the Steam store.
While the campaign features promintently in both lists, the more than 1600 positive references far outstrip the number of negative references made in user reviews.
Similarly, the over 350 negative references to the game’s multiplayer aspect, is outweighed by almost 800 positive references.
The remainder of the overlapping aspects of design seem far more divisive and demonstrate more even splits in player opinion.
#%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')
pos = (df[(df['sentiment']>0) & (df['aspect']!='game')]
.groupby('aspect')[['sentiment']]
.count()
.sort_values(by='sentiment',ascending=False)
.reset_index()
.head(20))
sns.catplot(data = pos,
y='aspect',
x='sentiment',
kind='bar',
palette = ['#88D8B0'],
height = 6,
aspect = 1.5)
plt.title('Positive Game Aspects',fontsize=14)
plt.tick_params(labelsize=12)
plt.ylabel('Descriptor',fontsize=12)
plt.xlabel('Count',fontsize=12)
plt.xticks(list(range(0,1701,100)))
plt.tight_layout()
plt.show();
neg = (df[(df['sentiment']<0) & (df['aspect']!='game')]
.groupby('aspect')[['sentiment']]
.count()
.sort_values(by='sentiment',ascending=False)
.reset_index()
.head(20))
sns.catplot(data = neg,
y='aspect',
x='sentiment',
kind='bar',
palette = ['#FF6F69'],
height = 6,
aspect = 1.5)
plt.title('Negative Game Aspects',fontsize=14)
plt.tick_params(labelsize=12)
plt.ylabel('Descriptor',fontsize=12)
plt.xlabel('Count',fontsize=12)
plt.xticks(list(range(0,1701,100)))
plt.tight_layout()
plt.show();
import numpy as np
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
hv.output(size=300)
# create list of aspects from top pos and neg lists
aspect_list = set(pos['aspect'].to_list() + neg['aspect'].to_list())
# Create df of top aspects across pos and neg sentiments
top_df = df[(df['aspect'].isin(aspect_list))]
desc_counts = (top_df
.groupby(by=['description'])[['sentiment']]
.count()
.rename(columns={'sentiment':'Count'})
.sort_values(by ='Count', ascending =False)
.reset_index())
top_df = top_df = df[(df['aspect'].isin(aspect_list)) & (df['description'].isin(desc_counts[desc_counts['Count']>149]['description']))]
links = (top_df
.groupby(by=['aspect','description'],as_index=False)[['sentiment']]
.count()
.rename(columns={'sentiment':'Count'}))
# specify node names
#nodes = list(set(links['aspect'].tolist() + links['description'].tolist()))
nodes = list(set(links['aspect'].tolist()))
nodes.extend(set(links['description'].tolist()))
nodes = hv.Dataset(pd.DataFrame(nodes, columns = ['Token']))
def rotate_label(plot, element):
white_space = " "
angles = plot.handles['text_1_source'].data['angle']
characters = np.array(plot.handles['text_1_source'].data['text'])
plot.handles['text_1_source'].data['text'] = np.array([x + white_space if x in characters[np.where((angles < -1.5707963267949) | (angles > 1.5707963267949))] else x for x in plot.handles['text_1_source'].data['text']])
plot.handles['text_1_source'].data['text'] = np.array([white_space + x if x in characters[np.where((angles > -1.5707963267949) | (angles < 1.5707963267949))] else x for x in plot.handles['text_1_source'].data['text']])
angles[np.where((angles < -1.5707963267949) | (angles > 1.5707963267949))] += 3.1415926535898
plot.handles['text_1_glyph'].text_align = "center"
# create chord diagram
chord = hv.Chord((links, nodes)).select(Count=(25, None))
chord.opts(
opts.Chord(title='Reltionships Between Aspects and Tokens', labels = 'Token', label_text_font_size='12pt',
node_color='Token', node_cmap=['#c1c1c1','#adadad'],node_size=10,
edge_color='aspect', edge_cmap=['#FF6F69','#88D8B0','#ffcc5c'],
hooks=[rotate_label], edge_alpha=0.8, edge_line_width=1)
)
label_data = chord.nodes.data label_data[‘rotation’] = np.arctan((label_data.y / label_data.x))
label_data[‘y’] = label_data[‘y’].apply(lambda x: x * 1.1) label_data[‘x’] = label_data[‘x’].apply(lambda x: x * 1.1)
labels = hv.Labels(label_data) labels.opts( opts.Labels(cmap=’magma’, text_font_size=’10pt’,padding=0.08, angle= dim(‘rotation’) * 1260/22 )) chord * labels
# create df counting all links between aspect and sentiment
counts_df = (df[(df['sentiment']!=0) & (df['aspect']!='game') & (df['aspect'].isin(aspect_list))]
.groupby(by=['aspect','description'])[['sentiment']]
.count()
.rename(columns={'sentiment':'Count'})
.sort_values(by ='Count', ascending =False)
.reset_index()
)
# restrict to counts >20
counts_df = counts_df[counts_df['Count']>=20]
# create adjacency matrices for only tokens in counts_df
df_adj = df[df['aspect'].isin(set(df.aspect).intersection(set(counts_df.aspect))) & df['description'].isin(set(df.description).intersection(set(counts_df.description)))]
# adj1 (x,y)
adj1 = pd.crosstab(df_adj.description,(df_adj.aspect))
idx = adj1.columns.union(adj1.index)
adj1 = adj1.reindex(index=idx,columns=idx)
# adj2 (y,x)
adj2 = pd.crosstab(df_adj.aspect,df_adj.description)
idx = adj2.columns.union(adj2.index)
adj2 = adj2.reindex(index=idx,columns=idx)
# merge to replace & make symetrical
adj = adj1.fillna(adj2)
links = adj.to_numpy()
nodes = list(adj.columns)
import mne
from mne.viz import circular_layout
from mne_connectivity.viz import plot_connectivity_circle
start, end = 45, 135
first_half = (np.linspace(start, end, len(nodes)//2) +90).astype(int)[::+1] %360
second_half = (np.linspace(start, end, len(nodes)//2) -90).astype(int)[::-1] %360
node_angles = np.array(list(first_half) + list(second_half))
fig, ax = plt.subplots(figsize=(20, 20), facecolor='black',subplot_kw=dict(polar=True))
plot_connectivity_circle(links, nodes, interactive= True, ax=ax)
fig.tight_layout()
# Create df of top aspects across pos and neg sentiments
top_df = df[(df['aspect'].isin(pos['aspect'])) | (df['aspect'].isin(neg['aspect']))]
# groupby to create count of descripto/sentiment links
top_df =(top_df
.groupby(by=['aspect','description'])[['sentiment']]
.count()
.rename(columns={'sentiment':'Count'})
.sort_values(by ='Count', ascending =False)
.reset_index())
# restrict to links greater than 20 occurrences
top_df = top_df[top_df['Count']>=20]
# create adjacency matrices for only tokens in top_df
df_adj = df[df['aspect'].isin(set(df.aspect).intersection(set(top_df.aspect))) & df['description'].isin(set(df.description).intersection(set(top_df.description)))]
# adj1 (x,y)
adj1 = pd.crosstab(df_adj.description,(df_adj.aspect))
idx = adj1.columns.union(adj1.index)
adj1 = adj1.reindex(index=idx,columns=idx)
# adj2 (y,x)
adj2 = pd.crosstab(df_adj.aspect,df_adj.description)
idx = adj2.columns.union(adj2.index)
adj2 = adj2.reindex(index=idx,columns=idx)
# merge to replace & make symetrical
adj = adj1.fillna(adj2)
# define array of links
links = adj.to_numpy()
#define names of nodes
nodes = list(adj.columns)
label_names = set(top_df['aspect'].to_list() + top_df['description'].to_list())
lh_labels = list(set(top_df['aspect']))
rh_labels = list(set(top_df['description']))
node_order = list()
node_order.extend(lh_labels[::-1]) # reverse the order
node_order.extend(rh_labels)
node_angles = circular_layout(label_names, node_order, start_pos=90,
group_boundaries=[0, len(label_names) / 2])
node_angles = circular_layout(label_names, node_order, start_pos=270,
group_boundaries=[0, len(lh_labels)])
fig, axes = plot_connectivity_circle(links, label_names,
node_angles=node_angles)
node_angles = circular_layout(label_names, node_order, start_pos=0,
group_boundaries=[0, len(lh_labels)])
fig, axes = plot_connectivity_circle(links, label_names,
node_angles=node_angles)
# Create df of top aspects across pos and neg sentiments
top_df = df[(df['aspect'].isin(pos['aspect'])) | (df['aspect'].isin(neg['aspect']))]
top_df = (top_df
.groupby(by=['description'])[['sentiment']]
.count()
.rename(columns={'sentiment':'Count'})
.sort_values(by ='Count', ascending =False)
.reset_index())
top_df = df[(df['aspect'].isin(aspect_list)) & (df['description'].isin(top_df[top_df['Count']>99]['description']))]
# create adjacency matrices for only tokens in top_df
df_adj = df[df['aspect'].isin(set(df.aspect).intersection(set(top_df.aspect))) & df['description'].isin(set(df.description).intersection(set(top_df.description)))]
# adj1 (x,y)
adj1 = pd.crosstab(df_adj.description,(df_adj.aspect))
idx = adj1.columns.union(adj1.index)
adj1 = adj1.reindex(index=idx,columns=idx)
# adj2 (y,x)
adj2 = pd.crosstab(df_adj.aspect,df_adj.description)
idx = adj2.columns.union(adj2.index)
adj2 = adj2.reindex(index=idx,columns=idx)
# merge to replace & make symetrical
adj = adj1.fillna(adj2)
# define array of links
links = adj.to_numpy()
#define names of nodes
nodes = list(adj.columns)
label_names = set(top_df['aspect'].to_list() + top_df['description'].to_list())
lh_labels = list(set(top_df['aspect']))
rh_labels = list(set(top_df['description']))
node_order = list()
node_order.extend(lh_labels[::-1]) # reverse the order
node_order.extend(rh_labels)
node_angles = circular_layout(label_names, node_order, start_pos=90,
group_boundaries=[0, len(label_names) / 2])
node_angles = circular_layout(label_names, node_order, start_pos=270,
group_boundaries=[0, len(lh_labels)])
fig, axes = plot_connectivity_circle(links, label_names,
node_angles=node_angles)